keep = ls()

#######################################
#Merge data for GOP Voteshare analysis#
#######################################

#Census data
census = fread('./data/county_elections/cleaned/census_data_wide.csv')
use_idx = names(census) %>% str_detect('(^pct_from_in)|(^pct_target_in)') %>% !. 
census = census[, .SD, .SDcols = names(census)[use_idx]]

#Election data (Federal)  
elections = fread('./data/county_elections/cleaned/elections_wide.csv')
elections[, vs_gop_alt_1878_PRES := NULL]

#Enlistment data
enlistment = fread('./data/county_elections/cleaned/county_enlistment_use.csv')

#Merge
setkey(enlistment, state_1860, county_1860)
setkey(census, target_state, target_name)
setkey(elections, target_state, target_name)

merged = census[enlistment] %>% elections[.]

#Create enlistment rates
merged[, vet_pct := veterans / mil_age_male_1860]
merged[, vet_alt_pct := veterans_alt / mil_age_male_1860]
merged[, vet_s_pct := veterans_s / mil_age_male_1860]

#reshape to Panel
merged_panel = melt.data.table(merged, 
                               id.vars = c("target_state", "target_name", "target_fips", 'vet_pct', 'vet_alt_pct', 'vet_s_pct'),
                               measure.vars = patterns("vs_gop"),
                               variable.name = 'election_name',
                               value.name = 'gop_vs')

#Election attributes
merged_panel[, election_year := str_extract(election_name, "\\d{4}")]
merged_panel[, election_type := str_extract(election_name, "[A-Z]+$")]
merged_panel[, election_alt := str_detect(election_name, 'alt')]
merged_panel[, state_election := paste(target_state,election_year,election_type, sep = "_") %>% as.factor]

#GOP not running: flag
merged_panel[, gop_vs_na := is.na(gop_vs) | (gop_vs == 0)]
merged_panel[(gop_vs_na), gop_vs := 0]

#War indicator
merged_panel[, postbellum := 1*(election_year > 1860)]

#Year factor (sets 1860 as comparison)
year_levels = c(1860, seq(1854,1858,2), seq(1862,1920,2)) %>% as.character()
merged_panel[, year_f := factor(election_year, levels = year_levels)]

#Estimation Sample
#non-zero votes cast, 
#federal elections, 
#enlistment rate in (0,1)
#calculate denominator in voteshare by summing votes cast

merged_panel[, sample := is.finite(gop_vs) & 
               election_type %in% c("PRES", 'CONG') & 
               vet_alt_pct %between% c(0,1) &
               (election_alt)]

#Any missing GOP VS in 1854:1860
merged_panel[(sample), any_na := gop_vs_na[election_year %in% 1854:1860] %>% any(), by = list(target_state, target_name)]  

##########
#Figure 1#
##########

#Drop Maine because it is missing congressional vote in 1856,1858
#Drop 1854, because Republican party not on the ballot in all states
parallel_use = merged_panel[target_state != "MAINE" &
                            (sample) &
                            election_year %in% 1856:1880]

#Get average voteshare in that year for a county (average Pres and Cong)
parallel_use = parallel_use[, list(gop_vs = mean(gop_vs, na.rm = T)), 
                            by = list(target_state, target_name, election_year, vet_alt_pct)]

#Drop if there are 0/NA for GOP in 1856:1860
parallel_use[, drop := any(gop_vs[election_year %in% 1856:1860] %in% 0) | 
                        any(is.na(gop_vs[election_year %in% 1856:1860])),
             by = list(target_state, target_name)]
parallel_use = parallel_use[!(drop)]

#Get enlistment quantiles within states
q = 4
parallel_use[, vet_q := vet_alt_pct %>% 
               cut(., 
                   breaks =  c(0,quantile(vet_alt_pct, (1:q)/q, na.rm = T)), 
                   labels = paste0('q', 1:q)), 
             by = target_state]
parallel_use[, postbellum := election_year > 1861]
parallel_use[, u_county := paste(target_state, target_name, sep = ":")]
parallel_use[, state_year := paste(target_state, election_year, sep = ":")]
parallel_use[, year_f := factor(election_year, levels = c(1860, 1856, 1858, seq(1862,1880,2)))]

#Get mean GOP vs within quantiles
#Dropping any 0 GOP voteshare counties
plot_data = parallel_use[, list(
  gop_vs_alt = mean(gop_vs [gop_vs != 0], na.rm = T),
  m_vet = mean(vet_alt_pct, na.rm = T),
  N = .N), by = list(vet_q, election_year) ]

#State-year Centered on 1860
plot_data[, gop_vs_alt_c := gop_vs_alt - gop_vs_alt[election_year %in% 1860], by = vet_q]

#Plot parallel trends
war = data.frame(start = 1861 + 4/12,
                 end = 1865 + 4/12)
a = ggplot(plot_data[!is.na(vet_q)], aes(x = election_year, y = gop_vs_alt, group = vet_q, colour = vet_q)) + 
  geom_line() + theme_bw() + 
  scale_color_grey(name = "Enlistment\nQuartile") +
  xlab("Election Year") + 
  ylab("Republican Voteshare") +
  ggtitle("Raw Voteshare") 

b = ggplot(plot_data[!is.na(vet_q)], aes(x = election_year, y = gop_vs_alt_c, group = vet_q, colour = vet_q)) + 
  geom_line() + theme_bw() + 
  scale_color_grey(name = "Enlistment\nQuartile") +
  xlab("Election Year") + 
  ylab("Republican Voteshare") + 
  ggtitle("Difference from 1860 Baseline")

out = ggarrange(a,b, 
                labels = c("A", "B"),
                ncol = 1, nrow = 2, 
                common.legend = T, legend = 'bottom')
ggsave('./output/paper/Figure_1.pdf', out, width = 8.5, height = 8)

#########
#Table 1#
#########

#Estimate for years 1854 to 1880
merged_use = merged_panel[(sample) & election_year %in% 1854:1880]

#Continuous Difference in Differences:
#All counties
dd1 = felm(gop_vs ~ postbellum:vet_alt_pct  | target_fips + state_election  | 0 | target_fips + election_year ,
           merged_use) 

#Drop counties with GOP votes that are 0.
dd2 = felm(gop_vs ~ postbellum:vet_alt_pct  | target_fips + state_election  | 0 | target_fips + election_year ,
           merged_use[!(any_na)]) 

#Dummy for county year with no GOP votes
dd3 = felm(gop_vs ~ postbellum:vet_alt_pct + gop_vs_na | target_fips + state_election  | 0 | target_fips + election_year ,
           merged_use) 

#Make Table
model_list = list(dd1, dd2, dd3)

dd = stargazer(model_list,
               type = 'latex',
               keep.stat = 'n',
               dep.var.labels = "Republican Voteshare",
               label = 'tab:gop_dd_table',
               covariate.labels = c('Enlist \\% $\\cdot$ Postbellum', 'No Rep. Candidate'),
               add.lines = list(c('GOP no contest', "included", 'dropped', 'dummy'),
                                c("County FE", rep("X", 3)),
                                c("State-Election FE", rep('X', 3))),
               star.cutoffs = c(0.05,0.01,0.001),
               order = c(2,1),
               title = "Difference-in-Differences Estimate of Effect of County Enlistment Rate on Republican Voteshare"
)
  note = 'Data from Congressional and Presidential elections across 384 counties between 1854 and 1880. Standard errors clustered by county and election year. Counties with election cycles in which the GOP does not contest an election cycle either treated as a $0$, the election is marked with a dummy, or the all observations for that county are dropped.'
  write_latex(dd, note, './output/paper/Table_1.tex')
  
##########
#Table A2#
##########

#Replicate Table 1, using surviving veterans
#Estimate for years 1854 to 1880
merged_use = merged_panel[(sample) & election_year %in% 1854:1880]

dd1_s = felm(gop_vs ~ postbellum:vet_s_pct  | target_fips + state_election  | 0 | target_fips + election_year ,
             merged_use) 

dd2_s = felm(gop_vs ~ postbellum:vet_s_pct  | target_fips + state_election  | 0 | target_fips + election_year ,
             merged_use[!(any_na)]) 

dd3_s = felm(gop_vs ~ postbellum:vet_s_pct + gop_vs_na | target_fips + state_election  | 0 | target_fips + election_year ,
             merged_use) 

#Make Table
model_list = list(dd1_s, dd2_s, dd3_s)

dd_s = stargazer(model_list,
                 type = 'latex',
                 keep.stat = 'n',
                 dep.var.labels = "Republican Voteshare",
                 label = 'tab:gop_dd_survived_table',
                 covariate.labels = c('Survived \\% $\\cdot$ Postbellum', 'No. Rep. Candidate'),
                 add.lines = list(c('GOP no contest', "included", 'dropped', 'dummy'),
                                  c("County FE", rep("X", 3)),
                                  c("State-Election FE", rep('X', 3))),
                 star.cutoffs = c(0.05,0.01,0.001),
                 order = c(2,1),
                 title = "Difference-in-Differences Estimate of Effect of County Enlistment Rate (Survivors Only) on Republican Voteshare"
)
note = 'Data from Congressional and Presidential elections across 293 counties between 1854 and 1880. Standard errors clustered by county and election year.  Counties with election cycles in which the GOP does not contest an election cycle either treated as a $0$, the election is marked with a dummy, or the all observations for that county are dropped.'
write_latex(dd_s, note, './output/appendix/Table_A2.tex')

###########
#Figure A1#
###########

#Estimate for years 1854 to 1920
merged_use = merged_panel[(sample) & election_year %in% 1854:1920]

#All Counties in which GOP ran between 1854 and 1860
g = felm(gop_vs ~ 1 + year_f*vet_alt_pct  | target_fips + state_election | 0 | target_fips + election_year ,
          merged_use[!(any_na)]) 


plot.raw = summary(g)$coefficients
plot.idx = rownames(plot.raw) %>% str_detect('year_f\\d{4}:vet_alt_pct')
plot.year = rownames(plot.raw)[plot.idx] %>% str_extract('\\d{4}') %>% as.numeric
plot.data = data.table(year = plot.year, t = plot.raw[plot.idx,1], lower = plot.raw[plot.idx,1]-1.96*plot.raw[plot.idx,2], upper = plot.raw[plot.idx,1]+1.96*plot.raw[plot.idx,2])
plot.data = rbind(plot.data, data.table(year= 1860, t = 0), fill = T)

p = ggplot(plot.data, 
       aes(x = year, y = t, ymin = lower, ymax = upper)) +
  geom_point() +
  geom_linerange() + 
  geom_hline(yintercept = 0, colour = 'red') +
  xlab("Election Year") +
  ylab("Effect of Enlistment Rate") +
  geom_rect(data= data.frame(Start = 1861 + 4/12, End = 1865 + 4/12), inherit.aes = FALSE,
            aes(xmin=Start, xmax=End, ymin=-Inf, ymax=+Inf), 
            fill='gray', alpha=0.2) + 
  theme_bw() + 
  scale_x_continuous(breaks = seq(1856,1920,4))

ggsave('./output/appendix/Figure_A1.pdf', p, width = 8.5, height = 5.5)


#Full table
note = 'Data from Congressional and Presidential elections across 384 counties between 1854 and 1880. Standard errors clustered by county and election year. Counties with election cycles in which the GOP does not contest an election cycle are dropped.'

rename_explanator <- function(old_names) {
  new_names <- gsub("year_f", "Year ", old_names)
  new_names <- gsub('vet_alt_pct', "Enlistment Rate", new_names)
  new_names <- gsub(':', " * ", new_names)
  setNames(new_names, old_names)
}

table_out = modelsummary(g, stars = T, 
             coef_rename = rename_explanator, 
             notes = note,
             title = "Effect of veterans on Republican Vote-share in federal elections for each year
between 1854 and 1920",
             coef_omit = "(year_f\\d\\d\\d\\d$)|(^vet_alt_pct$)")

table_out = data.table(table = table_out, name = "Figure A1")
table_list = c(table_list, list(table_out))


################
#Figures A5, A6#
################

#Estimate for years 1856 to 1860, 1864 to 1868
merged_use = merged_panel[(sample) & election_year %in% c(1856:1860, 1864:1868)]

#Collapse by county (keep only counties with non missing election values)
merged_use = merged_use[!(gop_vs_na), list(gop_vs_post = mean(gop_vs[postbellum==1], na.rm = T), 
                                           gop_vs_pre = mean(gop_vs[postbellum==0], na.rm = T)
                                          ), 
                                      by = list(target_state, target_name, vet_s_pct)]

#Merge in mean experiences of surviving soldiers
setkey(merged_use, target_state, target_name)
setkey(enlistment, state_1860, county_1860)
merged_use = enlistment[merged_use]

#difference in GOP voting
merged_use[, diff := gop_vs_post - gop_vs_pre]

n_co = merged_use[!is.na(gop_vs_post) & !is.na(gop_vs_pre) & !is.na(vet_s_pct) & !is.na(company_casualty_rate)] %>% nrow

#Prepare interactions
conflict_vars = c('company_casualty_rate', 'regiment_casualty_rate')
conflict_labs = c("Mean Company Casualty Rate", "Mean Regimental Casualty Rate")

interactions = data.frame(
  x = conflict_vars,
  lab =  conflict_labs,
  type = rep('conflict', length(conflict_vars)),
  stringsAsFactors = F
)

#Loop over interactions
# Estimate LDV/Diff
i_plots = vector('list', length = nrow(interactions))

for (r in 1:nrow(interactions)) {
  
  #Get interactive variable
  x_var = interactions$x[r]
  d_var = "vet_s_pct"
  z_diff = NULL
  z_ldv = 'gop_vs_pre'
  y_ldv = 'gop_vs_post'
  y_diff = 'diff'
  fe = 'state_1860'
  #LDV binning
  a = inter.binning(Y = y_ldv, D = d_var, X = x_var, Z = z_ldv, FE = fe, 
                    data = merged_use, na.rm = T, vartype = 'robust',
                    main = "Lagged DV", theme.bw = T, full.moderate = F,
                    Xlabel = conflict_labs[r])
  
  #Diff bin
  b = inter.binning(Y = y_diff, D = d_var, X = x_var, Z = z_diff, FE = fe, 
                    data = merged_use, na.rm = T, vartype = 'robust',
                    main = "Differenced", theme.bw = T, full.moderate = F,
                    Xlabel = conflict_labs[r])
  
  
  ylab = "Marginal Effect of Enlistment (%)"
  xlab = paste("Moderator:",interactions$lab[r])
  
  out_list =  list(a$graph, b$graph)
  for (i in seq_along(out_list)) {
    out_list[[i]]$labels$x = NULL
    out_list[[i]]$labels$y = NULL
    out_list[[i]]$theme$plot.title$size = 15
  }
  
  #Generate figure:
  
  i_plots[[r]] = grid.arrange( 
    arrangeGrob(grobs = out_list, 
                bottom=textGrob(xlab, gp=gpar(fontface="bold", fontsize=15)), 
                left=textGrob(ylab, gp=gpar(fontface="bold", fontsize=15), rot = 90), nrow = 1, ncol = 2) 
  )

  #Generate table:
  out_table = rbind(a$est.bin, b$est.bin) %>% .[, -1] %>% as.data.frame() %>% round(3)
  names(out_table)[1] = "Enlistment Rate"
  
  note = paste("Marginal effect of County Enlistment Rate (survivors only) on Republican Voteshare in 1864-1868 elections, across ", conflict_labs[r], ", conditional on state fixed effects. Sample includes ", n_co, " counties, and excludes Indiana for which we cannot calculate mean casualty rates. Standard errors are robust (HC1).", sep = "")
  title = paste0("Marginal Effect of Enlistment Rates on Republican Voteshare Conditional on ", interactions$lab[r])
  table_out = kable(out_table, caption = title) %>% 
    pack_rows(index = c("Lagged DV" = 3, "Difference-in-Difference" = 3)) %>%
    kable_styling() %>%
    add_footnote(note)
  
  table_out = data.table(table = table_out, name = paste0("Figure A", 4+r))
  table_list = c(table_list, list(table_out))
  
}

ggsave('./output/appendix/Figure_A5.pdf', i_plots[[1]], height = 4.5, width = 8.5)
ggsave('./output/appendix/Figure_A6.pdf', i_plots[[2]], height = 4.5, width = 8.5)


########################
#Robustness of Table A1#
########################

#Create Covariate variables
###########################

#Get unique id, treatment, outcome variable names
post_war_election_vars = names(elections) %>% str_detect('^vs_gop_alt') %>% names(elections)[.]
pre_war_election_vars = names(elections) %>% str_detect('^pre_vs_') %>% names(elections)[.]
x_vars = c('vet_pct', 'vet_alt_pct', names(enlistment))
location_vars = c("target_state", "target_name", "target_fips", 'i.target_fips', 'statefip_1860')

#Get possible covariates
X = merged[, .SD, .SDcols = setdiff(names(merged), c(x_vars, location_vars, post_war_election_vars))]
#Get unique id, treatment, outcome variables
XD = merged[, .SD, .SDcols = intersect(c(x_vars, location_vars, post_war_election_vars), names(merged))]
#Get state dummies
stateX = model.matrix( ~ 0 + target_state, merged) 

#Create 1860 census covariates
X[, l_totpop_1860 := log(totpop_1860 + 1)]
X[, per_capita_mil_age_male := mil_age_male_1860 / totpop_1860]
X[, male_female_ratio := mil_age_male_1860 / mil_age_female_1860]
X[, l_mfgout := log(mfgout_1860 + 1)]
X[, l_mfgout_per_cap := log(per_capita_mfgout_1860 + 1)]
X[, pct_abolition_denom := abolition_acc_total_1860/totpop_1860]
X[, l_agout := log(agout3_1860 + 1)]
X[, l_agout_per_capita := log(per_capita_agout3_1860 + 1)]
X[, l_agout_per_acre := log(per_acre_i_agout3_1860 + 1)]
X[, per_female_mfglabf_1860 := mfglabf_1860 / ftot_1860]

#Create Republican VS trend
gop_fed_vars = names(X) %>% str_detect(., '^pre_vs_gop_alt_\\d{4}_((PRES$)|(CONG$))') %>% names(X)[.]

#find counties with more than 2 years of election returns
idx = X[, unlist(.SD) %>% is.na %>% `!`  %>% sum %>% `>` (2), .SDcols = gop_fed_vars, by = 1:nrow(X)]$V1
X[, i := 1:nrow(X)]
X[idx,   c('gop_fed_trend_1', 'gop_fed_trend_2') := {a = .SD %>% melt.data.table(measure.vars = gop_fed_vars, variable.factor = F);
a$year = str_extract(a$variable, "\\d{4}") %>% as.numeric;
b = lm(value ~ I(year - 1857) + I((year - 1857)^2), a) %>% coef;
list(gop_trend = b[2], gop_trend_2 = b[3])}  , .SDcols = gop_fed_vars, by = i]
X[, i := NULL]
#Function to set variables as missing
set_NA = function(x) {
  x[is.infinite(x) | is.nan(x)] = NA
  return(x)
}

#Set all infinite, NaN columns NA
X = X[, lapply(.SD, set_NA)]

#Create missing data indicators, set missing values to 0
#for pre-war elections
alloc.col(X, length(pre_war_election_vars)*2)
for (c in pre_war_election_vars) {
  na_idx = X[[c]] %>% is.na
  if (na_idx %>% any) {
    set(X,
        i = 1:nrow(X),
        j = paste0('NA_', c),
        value = na_idx*1
    )
    set(X,
        i = which(na_idx),
        j = c,
        value = 0
    )
  }
}


#Covariates to use
pre_war_election_vars = names(elections) %>% str_detect('^pre_vs_') %>% names(elections)[.]
newXvars = names(X)[which(str_detect(names(X), "l_totpop_1860")):ncol(X)]
keepVars = c(newXvars, 
             c('per_capita_fbfree_1860', 
              'per_capita_fctot_1860', 
              'per_capita_urb860_1860',
              'per_capita_whtot_1860',
              'pct_southern_born_1860',
              'per_male_mfglabm_1860', 
              'mean_farm_acreage_1860', 
              'per_acre_i_farmval_1860',
              'land_gini_1860',
              'per_acre_i_mil_age_male_1860',
              'per_capita_agout3_1860'))

#Drop unused variables
X = X[, .SD, .SDcols = intersect(c(pre_war_election_vars, keepVars, paste0("NA_", pre_war_election_vars)), names(X))]

#Combine covariates/treatment/dv
useTable = cbind(XD, X, stateX)
useTable[, use_vet := !is.na(vet_alt_pct) & vet_alt_pct %between% c(0,1)]

#Define variable groups
#######################

#gop pre-war election variables
pw_gop_trend_federal = c('gop_fed_trend_1', 'gop_fed_trend_2')
pw_gop_vote_federal = names(X)[str_detect(names(X), "pre_vs_gop_alt_\\d{4}_((CONG)|(PRES))")]
pw_gop_vote_all = names(X)[str_detect(names(X), "pre_vs_gop_alt")]

#whig election variables
pw_whig_federal = names(X)[str_detect(names(X), "pre_vs_0029_18[456]\\d_((CONG)|(PRES))")]
pw_whig_all = names(X)[str_detect(names(X), "pre_vs_0029_18[456]\\d")]

#antislavery
pw_antislavery_federal = names(X)[str_detect(names(X), "pre_vs_antislavery_18[456]\\d_((CONG)|(PRES))")]
pw_antislavery_all = names(X)[str_detect(names(X), "pre_vs_antislavery_18[456]\\d")]

#freesoil
pw_freesoil_federal = names(X)[str_detect(names(X), "pre_vs_freesoil_18[456]\\d_((CONG)|(PRES))")]
pw_freesoil_all = names(X)[str_detect(names(X), "pre_vs_freesoil_18[456]\\d")]

#liberty
pw_liberty_federal = names(X)[str_detect(names(X), "pre_vs_liberty_18[456]\\d_((CONG)|(PRES))")]
pw_liberty_all = names(X)[str_detect(names(X), "pre_vs_liberty_18[456]\\d")]

#american
pw_american_federal = names(X)[str_detect(names(X), "pre_vs_american_18[456]\\d_((CONG)|(PRES))")]
pw_american_all = names(X)[str_detect(names(X), "pre_vs_american_18[456]\\d")]

#democrats
pw_dem_federal = names(X)[str_detect(names(X), "pre_vs_0100_18[456]\\d_((CONG)|(PRES))")]
pw_dem_all = names(X)[str_detect(names(X), "pre_vs_0100_18[456]\\d")]

#CU
pw_cu_federal = names(X)[str_detect(names(X), "pre_vs_0037_\\d{4}_((CONG)|(PRES))")]
pw_cu_all = names(X)[str_detect(names(X), "pre_vs_0037_\\d{4}")]

#covariates
covars_1860 = setdiff(keepVars, pre_war_election_vars)
covars_1860 = covars_1860[!str_detect(covars_1860, "(trend)|(NA)")]

#states
covars_states = colnames(stateX)

#Dependent variables
dvs = post_war_election_vars[str_detect(post_war_election_vars, "(PRES)|(CONG)")]

#Treatment
d = "vet_alt_pct"

###########
#Figure A2#
###########

x_vars= c(covars_1860,
          pw_gop_vote_federal,pw_gop_trend_federal,
          pw_american_federal, pw_liberty_federal, pw_freesoil_federal, 
          pw_dem_federal, pw_whig_federal)

#Temp Variables
temp = useTable[(use_vet), .SD, .SDcols = c(x_vars, d,  covars_states, 'target_state', 'target_name')]

#Predicting enlistment with GOP voteshare
mean_gop = temp[, .SD  %>% as.matrix %>% rowMeans(), .SDcols = pw_gop_vote_federal[1:6]]
mean_gop_na = temp[, .SD  %>% as.matrix %>% rowMeans, .SDcols = pw_gop_vote_federal[7:12]]
mean_american = temp[, .SD  %>% as.matrix %>% rowMeans, .SDcols = pw_american_federal[1:5]]
mean_american_na = temp[, .SD  %>% as.matrix %>% rowMeans, .SDcols = pw_american_federal[10:14]]
mean_liberty = temp[, .SD  %>% as.matrix %>% rowMeans, .SDcols = pw_liberty_federal[1:6]]
mean_liberty_na = temp[, .SD  %>% as.matrix %>% rowMeans, .SDcols = pw_liberty_federal[7:12]]
mean_freesoil = temp[, .SD  %>% as.matrix %>% rowMeans, .SDcols = pw_freesoil_federal[1:6]]
mean_freesoil_na = temp[, .SD  %>% as.matrix %>% rowMeans, .SDcols = pw_freesoil_federal[7:12]]
mean_dem = temp[, .SD  %>% as.matrix %>% rowMeans, .SDcols = pw_dem_federal[13:17]]
mean_dem_na = temp[, .SD  %>% as.matrix %>% rowMeans, .SDcols = pw_dem_federal[29:34]]

x0 = temp[, .SD, .SDcols = c(covars_1860, pw_gop_trend_federal)] %>% as.matrix
x0 = cbind(x0, 
           mean_gop, mean_gop_na, 
           mean_american, mean_american_na,
           mean_liberty,
           mean_freesoil, mean_freesoil_na,
           mean_dem, mean_dem_na)

s0 = temp[, .SD, .SDcols = covars_states] %>% as.matrix
w0 = temp[[d]]

m_vars = c(covars_1860, pw_gop_trend_federal, 
           'mean_gop', "mean_gop_na", 
           'mean_american', 'mean_american_na',
           'mean_liberty',
           'mean_freesoil', 'mean_freesoil_na',
           'mean_dem', 'mean_dem_na')

coef_xd = data.table(term = m_vars, 
                     estimate = as.numeric(NA), 
                     std.error = as.numeric(NA), 
                     statistic = as.numeric(NA), 
                     p.value = as.numeric(NA))
trimmed = matrix(NA, ncol = length(m_vars), nrow = nrow(x0))

#Loop over covariates
model_out = vector('list', length = length(m_vars))

for (i in seq_along(m_vars)) {
  trim = quantile(x0[,i], probs = c(0.005, 0.995), na.rm = T)
  idx = x0[,i] >= trim[1] & x0[,i] <= trim[2]
  trimmed[, i] = idx
  sx_sy = sd(x0[idx,i], na.rm = T) / sd(w0[idx], na.rm = T)
  m_i = lm(w0[idx] ~ x0[idx,i] + s0[idx,])
  model_out[[i]] = m_i
  beta = m_i$coefficients[2] * (sx_sy)
  se_1 = coeftest(m_i, vcov. = vcovHC(m_i, "HC1")) %>% .[2,2]  %>% `*` (sx_sy)
  se_2 = coeftest(m_i) %>% .[2,2] %>%  `*` (sx_sy)
  p_1 = coeftest(m_i, vcov. = vcovHC(m_i, "HC1")) %>% .[2,4] 
  p_2 = coeftest(m_i) %>% .[2,4]
  coef_xd[i, c('estimate', 'std.error', 'statistic', 'p.value') := 
            list(beta, max(c(se_1, se_2)), beta / max(c(se_1, se_2)), max(c(p_1, p_2)))
          ]
}

#Prep for plot
x_labs = c("Population (logged)",
           "Fraction Military-Age Males",
           "Males/Females Ratio",
           "Manuf. output (logged)",
           "Per capita Manuf. output (logged)",
           "Fraction Abolitionist Church Members",
           "Agr. output (logged)",
           "Per Capita Agr. output (logged)",
           "Agr. output per acre (logged)",
           "Fraction Females in Manuf.",
           "Fraction Foreign Born",
           "Fraction Free Black",
           "Fraction Urban",
           "Fraction White",
           "Fraction Southern-Born",
           "Fraction Males in Manuf.",
           "Mean Farm Acreage",
           "Average Value of Farm Acreage",
           "Farm Acreage Gini Coefficient",
           "Military-Age Males per Acre Farmland",
           "Per Capita Agr. output (unlogged)",
           "Prewar Republican Voteshare Time Trend (linear)",
           "Prewar Republican Voteshare Time Trend (quadratic)",
           "Mean Prewar Republican Voteshare",
           "Mean Prewar Republican Not on Ballot",
           "Mean Prewar American Party Voteshare",
           "Mean Prewar American Party Not on Ballot",
           "Mean Prewar Liberty Party Voteshare",
           "Mean Prewar Freesoil Party Voteshare",
           "Mean Prewar Freesoil Party Not on Ballot",
           "Mean Prewar Democratic Party Voteshare",
           "Mean Prewar Democratic Party Not on Ballot"
)
coef_xd$term = x_labs

setkey(coef_xd, estimate)
p = dwplot(coef_xd %>% as.data.frame(), 
           vline = geom_vline(xintercept = 0, colour = "grey60", linetype = 2)) + 
  theme_bw() + xlab("Standardized Coefficient Estimate") + ylab("") +
  theme(legend.position = "none")
ggsave("./output/appendix/Figure_A2.pdf", p, height = 11, width = 8.5)


#Full table results
note = 'Data from 384 counties. Unstandardized coefficients from regression enlistment rate on covariates. HC1 Standard Errors. Trimming counties with covariate values in top and bottom 0.5 percentiles.'

rename_explanator <- function(old_names) {
  new_names <- gsub("x0[idx, i]", "Covariate", old_names, fixed = T)
  new_names <- gsub('s0[idx, ]target_state', "", new_names, fixed = T) %>% str_to_title()
  setNames(new_names, old_names)
}
names(model_out) = x_labs

table_out = modelsummary(model_out, stars = T, 
                         coef_rename = rename_explanator, 
                         notes = note,
                         title = "Predictors of County Enlistment Rates (Unstandardized Coeﬀicients)"
                          )

table_out = data.table(table = table_out, name = "Figure A2")
table_list = c(table_list, list(table_out))


##########
#Table A1#
##########

#Identify counties that are not extreme outliers
idx = trimmed %>% rowSums(na.rm = T) %>% `>=` (32)
covars_panel = cbind(temp[, list(target_state, target_name, covars_use = idx)],
                     as.data.table(x0))

merged_use = merged_panel[(sample) & election_year %in% 1854:1880]

#Merge in controls:
setkey(merged_use, target_state, target_name)
setkey(covars_panel, target_state, target_name)
merged_use = covars_panel[merged_use]


#Formulae
f1 = as.formula(paste0("gop_vs ~ ", 
                       paste("postbellum", c('vet_alt_pct',m_vars), sep = ":", collapse = " + "),
                       " | target_fips + state_election | 0 | target_fips + election_year"
)
)
f2 = as.formula(paste0("gop_vs ~ ", 
                       paste("postbellum", c('vet_alt_pct',m_vars), sep = ":", collapse = " + "),
                       " + gop_vs_na | target_fips + state_election | 0 | target_fips + election_year"
)
)

##Using all states available (administrative records where available)
dd1 = felm(f1,
           merged_use[(covars_use)]) 

dd2 = felm(f1 ,
           merged_use[(covars_use) & !(any_na)]) 

dd3 = felm(f2,
           merged_use[(covars_use)]) 

#Write to table
model_list = list(dd1, dd2, dd3)
n_counties = lapply(model_list, function(x) x$clustervar$target_fips %>% unique %>% length) %>% unlist

dd = stargazer(model_list,
               type = 'latex',
               keep.stat = c('n', 'rsq'),
               dep.var.labels = "Republican Voteshare",
               keep = 'postbellum:vet_alt_pct',
               label = 'tab:gop_dd_table_covars',
               covariate.labels = 'Enlist \\% $\\cdot$ Postbellum',
               add.lines = list(c('GOP no contest', "included", 'dropped', 'dummy'),
                                c("County FE", rep("X", 3)),
                                c("State-Election FE", rep('X', 3)),
                                c("Covars $\\cdot$ Postbellum", rep("X",3)),
                                c("N Counties", n_counties)),
               star.cutoffs = c(0.05,0.01,0.001),
               title = "Difference-in-Differences Estimate (With Time-Interacted Controls) of Effect of County Enlistment Rate on Republican Voteshare"
)
note = 'Data from Congressional and Presidential elections across 330 counties between 1854 and 1880. Standard errors clustered by county and election year. Counties with election cycles in which the GOP does not contest an election cycle either treated as a $0$, the election is marked with a dummy, or the all observations for that county are dropped. The post-war indicator is interacted with 32 demographic, economic, and political covariates. Counties with missing data or extreme outliers in the covariates are dropped.'
write_latex(dd, note, './output/appendix/Table_A1.tex')


#Full table results

rename_explanator <- function(old_names) {
  new_names <- gsub("postbellum", "Postbellum", old_names)
  new_names <- gsub('vet_alt_pct', "Enlistment Rate", new_names)
  new_names <- gsub(':', " * ", new_names)
  new_names <- gsub('gop_vs_naTRUE', "No Rep. Candidate", new_names)
  for (i in seq_along(m_vars)) {
    new_names <- gsub(m_vars[i], x_labs[i], new_names)
  }
  setNames(new_names, old_names)
}


table_out = modelsummary(model_list, stars = T, 
                         coef_rename = rename_explanator, 
                         notes = note,
                         add_rows = rbind(c('GOP no contest', "included", 'dropped', 'dummy'),
                                          c("County FE", rep("X", 3)),
                                          c("State-Election FE", rep('X', 3)),
                                          c("Covars $\\cdot$ Postbellum", rep("X",3)),
                                          c("N Counties", n_counties)) %>% as.data.frame,
                         title = "Difference-in-Differences Estimate (With Time-Interacted Controls) of Effect of County Enlistment Rate on Republican Voteshare"
            )

table_out = data.table(table = table_out, name = "Table A1")
table_list = c(table_list, list(table_out))


###########
#Figure A3#
###########
  
#Formula
f1 = as.formula(paste0("gop_vs ~ 1 + ", 
                       paste("year_f", c('vet_alt_pct',m_vars), sep = "*", collapse = " + "),
                       " | target_fips + state_election | 0 | target_fips + election_year"
                ))

#Estimate
g = felm(f1 ,
          merged_use[!(any_na) & (covars_use)]) 

#Plot data
plot.raw = summary(g)$coefficients
plot.idx = rownames(plot.raw) %>% str_detect('year_f\\d{4}:vet_alt_pct')
plot.year = rownames(plot.raw)[plot.idx] %>% str_extract('\\d{4}') %>% as.numeric
plot.data = data.table(year = plot.year, t = plot.raw[plot.idx,1], lower = plot.raw[plot.idx,1]-1.96*plot.raw[plot.idx,2], upper = plot.raw[plot.idx,1]+1.96*plot.raw[plot.idx,2])
plot.data = rbind(plot.data, data.table(year= 1860, t = 0), fill = T)

#Plot
p = ggplot(plot.data, 
       aes(x = year, y = t, ymin = lower, ymax = upper)) +
  geom_point() +
  geom_linerange() + 
  geom_hline(yintercept = 0, colour = 'red') +
  xlab("Election Year") +
  ylab("Effect of Enlistment Rate") +
  geom_rect(data= data.frame(Start = 1861 + 4/12, End = 1865 + 4/12), inherit.aes = FALSE,
            aes(xmin=Start, xmax=End, ymin=-Inf, ymax=+Inf), 
            fill='gray', alpha=0.2) + 
  theme_bw() + 
  scale_x_continuous(breaks = seq(1856,1880,4))
ggsave('./output/appendix/Figure_A3.pdf', p, width = 8.5, height = 5.5)


#Full Table
rename_explanator <- function(old_names) {
  new_names <- gsub("year_f", "Year ", old_names)
  new_names <- gsub('vet_alt_pct', "Enlistment Rate", new_names)
  new_names <- gsub(':', " * ", new_names)
  new_names <- gsub('gop_vs_naTRUE', "No Rep. Candidate", new_names)
  for (i in seq_along(m_vars)) {
    new_names <- gsub(m_vars[i], x_labs[i], new_names)
  }
  setNames(new_names, old_names)
}

note = 'Data from Congressional and Presidential elections across 257 counties between 1854 and 1880. Standard errors clustered by county and election year. Counties with election cycles in which the GOP does not contest an election cycle  are dropped. The year indicators, with 1860 as the reference, are interacted with 32 demographic, economic, and political covariates. Counties with missing data or extreme outliers in the covariates are dropped.'


table_out = modelsummary(g, stars = T, 
                         coef_rename = rename_explanator, 
                         notes = note,
                         title = "Effect of veterans on Republican Vote-share in federal elections for each year
between 1854 and 1920, with Covariate-Time interactions"
)

table_out = data.table(table = table_out, name = "Figure A3")
table_list = c(table_list, list(table_out))


###########
#Figure A4#
###########

#Fuction for robust SE
vcovHC1 = function(x) {vcovHC(x, "HC1")}

#Function for GRF average partial effect
cf_ape = function(X, Y, W, num.trees = 2000, sample.fraction = NULL) {
  y.fit = regression_forest(X = X, 
                            Y = Y, 
                            tune.parameters = 'all',
                            num.trees = num.trees)
  w.fit = regression_forest(X = X, 
                            Y = W, 
                            tune.parameters = 'all',
                            num.trees = num.trees)
  cf = causal_forest(X = X, 
                     Y = Y,
                     W = W,
                     Y.hat = predict(y.fit)$predictions,
                     W.hat = predict(w.fit)$predictions,
                     tune.parameters = 'all',
                     num.trees = num.trees)
  return(average_partial_effect(cf))
}

#Average Partial Effects
temp = useTable[(use_vet)]

#Create 1860 GOP voteshare as baseline
temp[, gop_1860 := (pre_vs_gop_alt_1860_PRES + pre_vs_gop_alt_1860_CONG)/2]

#State dummies
temp_states = temp[(use_vet), list(target_state)]

#Estimate DD effects against 1860
#################################

#list of covariates
spec_vars = c(pw_gop_vote_federal, covars_1860, covars_states)

#Save yearly dd estimates
effects_list = vector('list', length = length(dvs))

#loop over post 1861 election outcomes
for (y in dvs[1:15]) {
    #subset data
    idx0 = temp[, .SD, .SDcols = c(spec_vars, y, d, 'gop_1860')] %>% complete.cases
    temp0 = temp[idx0]
    states0 = temp_states$target_state[idx0]
    
    #create data for estimation
    x0 = temp0[, .SD, .SDcols = spec_vars] %>% as.matrix
    w0 = temp0[[d]]
    y0 = temp0[[y]]
    y_1 = temp0[['gop_1860']]
    
    #State centering
    x1 = demeanlist(mtx = x0, fl = list(as.factor(states0)))
    w1 = demeanlist(mtx = w0, fl = list(as.factor(states0)))
    y1 = demeanlist(mtx = y0, fl = list(as.factor(states0)))
    y_1 = demeanlist(mtx = y_1, fl = list(as.factor(states0)))
    
    #Estimate GRF APE, LM
    grf_1 = cf_ape(x1, y1 - y_1, w1)
    lm_1 = lm(y1 - y_1 ~ w1 + x1) %>% coeftest(vcov.=vcovHC1) %>% .[2,1:2]
    
    effects_list[[match(y,dvs)]] = rbind(grf_1, 
                                         lm_1) %>% 
      as.data.table %>% 
      cbind(model = c('grf', 'lm'), 
            dv = y, dv_type = 'effect', 
            centering = 'states',
            n = sum(idx0))
    cat(y)
    cat('\n')
  }
  
effects_table = rbindlist(effects_list)
  
#loop over pre 1861 election outcomes
placebo_dvs = pw_gop_vote_federal[1:6]
placebo_list = vector('list', length = length(placebo_dvs))
  
for (y in placebo_dvs) {
    #subset data
    idx0 = temp[, .SD, .SDcols = c(spec_vars, y, d, 'gop_1860')] %>% complete.cases
    temp0 = temp[idx0]
    states0 = temp_states$target_state[idx0]
    
    #create data for estimation
    x0 = temp0[, .SD, .SDcols = spec_vars] %>% as.matrix
    w0 = temp0[[d]]
    y0 = temp0[[y]]
    y_1 = temp0[['gop_1860']]
    
    if (length(table(y0)) > 1) {
      
      #State centering
      x1 = demeanlist(mtx = x0, fl = list(as.factor(states0)))
      w1 = demeanlist(mtx = w0, fl = list(as.factor(states0)))
      y1 = demeanlist(mtx = y0, fl = list(as.factor(states0)))
      y_1 = demeanlist(mtx = y_1, fl = list(as.factor(states0)))
      
      #Estimate partial effects
      grf_1 = cf_ape(x1, y1 - y_1, w1)
      lm_1 = lm(y1 - y_1 ~ w1 + x1) %>% coeftest(vcov.=vcovHC1) %>% .[2,1:2]
      
      #save estimates
      placebo_list[[match(y,placebo_dvs)]] = rbind(grf_1, lm_1) %>% 
        as.data.table %>% 
        cbind(model = c('grf', 'lm'), 
              dv = y, dv_type = 'placebo', 
              centering = 'states',
              n = sum(idx0))
    }
    
    cat(y)
    cat('\n')
  }
placebo_table = rbindlist(placebo_list)
result_table = rbindlist(list(effects_table, placebo_table))

#Set up plot data
result_table[, election_type := str_extract(dv, "(CONG)|(PRES)")]
result_table[, party_gop := str_detect(dv, "vs_gop")]
result_table[, election_year := str_extract(dv, "\\d{4}(?=(_CONG)|(_PRES))") %>% as.numeric]
plot_data = result_table[(party_gop)]
plot_data[, lower := estimate - 2*std.err ]
plot_data[, upper := estimate + 2*std.err ]

plot_data[, model := str_to_upper(model) %>% paste0("Model: ",.)]

p = ggplot() +
  geom_rect(data=data.frame(xmin= 1861 ,
                            xmax= 1865 ,
                            ymin=-Inf,
                            ymax=Inf),
            aes(xmin=xmin,xmax=xmax,ymin=ymin,ymax=ymax),
            fill="grey",alpha=0.5) +
  geom_vline(xintercept = 1866, color = "gray70") +
  geom_pointrange(data =plot_data[centering %in% 'states' & model %in% "Model: GRF"],
                  aes(x = election_year, y = estimate, ymin = lower, ymax = upper, group = election_type, shape = election_type, color = election_type),
                  position = position_dodge(width = 1)) + 
  geom_hline(yintercept = 0) +
  scale_color_brewer(palette="Set1") +
  facet_grid(model ~ .) + xlab("Year") + ylab("Average Partial Effect") + 
  theme_bw() + theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) + 
  scale_x_continuous(breaks = seq(1854, 1880, 2)) + theme(legend.position="bottom")

ggsave("./output/appendix/Figure_A4.pdf", p, height = 6, width = 8)

##############################################################
#Cleanup
rm(list = setdiff(ls(), c(keep, 'keep')))
gc()